Installation

  1. Install R
  2. Install RTools if you are on Windows
  3. Install RStudio

For more details, see Software and Package Versions.

Running This Code

  1. Ensure the installation steps above are completed
  2. Download a zip of the code and data here and unzip it
  3. In RStudio, open the src/src.Rproj file
  4. Then, open the src/index.Rmd file
  5. In RStudio:
    • Run all code: Click the Run drop down (top right of the code pane) and click Run All
    • Generate HTML version: Click knit (top left of code pane) and a file will be generated in docs/index.html

Libraries

Install R packages if needed.

# Required packages
required_packages <- c(
    "rmarkdown",
    "bookdown",
    "knitr",
    "tidyverse",
    "purrr",
    "glue",
    "lubridate",
    "scales",
    "patchwork",
    "DiagrammeR",
    "DiagrammeRsvg",
    "webshot2",
    "magick",
    "rsvg",
    "sf",
    "tmap",
    "ggspatial",
    "prettymapr",
    "units",
    "boot"
)

# Try to install packages if not installed
default_options <- options()
tryCatch(
    {
        # Disable interactivity
        options(install.packages.compile.from.source = "always")
        
        # Install package if not installed
        for (package in required_packages) {
            is_package_installed <- require(package, character.only = TRUE)
            if (!is_package_installed) {
                cat(paste0("Installing package: ", package, "\n"))
                install.packages(package)
            } else {
                cat(paste0("Package already installed: ", package, "\n"))
            }
        }
    },
    error = function(cond) {
        stop(cond)
    },
    finally = {
        options(default_options) # reset interactivity
    }
)

Load R libraries.

library(boot)
library(DiagrammeR)
library(ggplot2)
library(ggspatial)
library(glue)
library(lubridate)
library(patchwork)
library(sf)
library(tidyverse)
library(tmap)

Data

Read data from the data folder.

ddesc <- read_csv("../data/data.csv")
ddesc

Vancouver Bikeways

Bikeways data with manually verified (Google Street View/Earth and Web Search) painted lanes and cycle tracks for Vancouver, Canada

# Read data
vancbike_raw <- read_sf("../data/vancouver-bikeways-2024-06-02.geojson")

# Get download date
vancbike_dldate <- ddesc %>% filter(
    file == "vancouver-bikeways-2024-06-02.geojson"
) %>% pull(download_date)

Map

Only the first 1000 records are shown.

tmap_mode("view")
tm_shape(vancbike_raw %>% head(1000)) +
    tm_lines(
        col = "#336699",
        border.col = "white",
        popup.vars = TRUE
    )

Data

  • Columns: 23
  • Rows: 3666
vancbike_raw %>% as_tibble

Dictionary

The data contains the following columns:

#vancbike_ddict <- read_csv("../data/vancouver-bikeways-2024-06-02-datadict.csv")
#vancbike_ddict

Details

print(vancbike_raw)
## Simple feature collection with 3666 features and 22 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -123.2238 ymin: 49.19899 xmax: -123.0233 ymax: 49.31428
## Geodetic CRS:  WGS 84
## # A tibble: 3,666 × 23
##    id     street     status road_type road_type_recode install_year install_type
##    <chr>  <chr>      <chr>  <chr>     <chr>                   <dbl> <chr>       
##  1 294725 Highbury   Active Resident… Local                    2006 Local Street
##  2 294726 Highbury   Active Resident… Local                    2006 Local Street
##  3 294731 W 8th Ave  Active Resident… Local                    1994 Local Street
##  4 294732 W 8th Ave  Active Resident… Local                    1994 Local Street
##  5 294733 Off Street Active Lane      Local                    2003 Protected B…
##  6 294736 W 5th Ave  Active Resident… Local                    2009 Local Street
##  7 294737 W 8th Ave  Active Resident… Local                    1994 Local Street
##  8 294738 W 7th Ave  Active Resident… Local                    1994 Local Street
##  9 294739 W 7th Ave  Active Resident… Local                    1994 Local Street
## 10 294742 W 7th Ave  Active Resident… Local                    1994 Local Street
## # ℹ 3,656 more rows
## # ℹ 16 more variables: verify_install_year <dbl>, verify_install_date <chr>,
## #   verify_install_type <chr>, verify_install_comment <chr>,
## #   verify_upgrade1_year <dbl>, verify_upgrade1_date <chr>,
## #   verify_upgrade1_type <chr>, verify_upgrade1_comment <chr>,
## #   verify_upgrade2_year <dbl>, verify_upgrade2_date <chr>,
## #   verify_upgrade2_type <chr>, verify_upgrade2_comment <chr>, …

Calgary Bikeways

Bikeways data with manually verified (Google Street View/Earth and Web Search) painted lanes and cycle tracks for Calgary, Canada

# Read data
calgbike_raw <- read_sf("../data/calgary-bikeways-2024-06-05.geojson")

# Get download date
calgbike_dldate <- ddesc %>% filter(
    file == "calgary-bikeways-2024-06-05.geojson"
) %>% pull(download_date)

Map

Only the first 1000 records are shown.

tmap_mode("view")
tm_shape(calgbike_raw %>% head(1000)) +
    tm_lines(
        col = "#336699",
        border.col = "white",
        popup.vars = TRUE
    )

Data

  • Columns: 22
  • Rows: 4169
calgbike_raw %>% as_tibble

Dictionary

The data contains the following columns:

#calgbike_ddict <- read_csv("../data/calgary-bikeways-2024-06-05-datadict.csv")
#calgbike_ddict

Details

print(calgbike_raw)
## Simple feature collection with 4169 features and 21 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -114.269 ymin: 50.89762 xmax: -113.9302 ymax: 51.17778
## Geodetic CRS:  WGS 84
## # A tibble: 4,169 × 22
##    id    street status   road_type road_type_recode install_year install_type   
##    <chr> <chr>  <chr>    <chr>     <chr>                   <dbl> <chr>          
##  1 1     <NA>   EXISTING <NA>      <NA>                     2003 On-Street Bike…
##  2 2     <NA>   EXISTING <NA>      <NA>                     2009 On-Street Bike…
##  3 3     <NA>   EXISTING <NA>      <NA>                     2009 On-Street Bike…
##  4 4     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
##  5 5     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
##  6 6     <NA>   EXISTING <NA>      <NA>                     2005 On-Street Bike…
##  7 7     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
##  8 8     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
##  9 9     <NA>   EXISTING <NA>      <NA>                     1999 On-Street Bike…
## 10 10    <NA>   INACTIVE <NA>      <NA>                       NA DECOMMISSIONED 
## # ℹ 4,159 more rows
## # ℹ 15 more variables: verify_install_year <dbl>, verify_install_date <chr>,
## #   verify_install_type <chr>, verify_install_comment <chr>,
## #   verify_upgrade1_year <dbl>, verify_upgrade1_date <chr>,
## #   verify_upgrade1_type <chr>, verify_upgrade1_comment <chr>,
## #   verify_upgrade2_year <dbl>, verify_upgrade2_date <chr>,
## #   verify_upgrade2_type <chr>, verify_upgrade2_comment <chr>, …

Toronto Bikeways

Bikeways data with manually verified (Google Street View/Earth and Web Search) painted lanes and cycle tracks for Toronto, Canada

# Read data
toronbike_raw <- read_sf("../data/toronto-bikeways-2024-06-02.geojson")

# Get download date
toronbike_dldate <- ddesc %>% filter(
    file == "toronto-bikeways-2024-06-02.geojson"
) %>% pull(download_date)

Map

Only the first 1000 records are shown.

tmap_mode("view")
tm_shape(toronbike_raw %>% head(1000)) +
    tm_lines(
        col = "#336699",
        border.col = "white",
        popup.vars = TRUE
    )

Data

  • Columns: 23
  • Rows: 1323
toronbike_raw %>% as_tibble

Dictionary

The data contains the following columns:

#toronbike_ddict <- read_csv("../data/toronto-bikeways-2024-06-02-datadict.csv")
#toronbike_ddict

Details

print(toronbike_raw)
## Simple feature collection with 1323 features and 22 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -79.63039 ymin: 43.58221 xmax: -79.11803 ymax: 43.85546
## Geodetic CRS:  WGS 84
## # A tibble: 1,323 × 23
##    id    street    street_from street_to road_type road_type_recode install_year
##    <chr> <chr>     <chr>       <chr>     <chr>     <chr>                   <dbl>
##  1 8     Bloor St… Parliament… Castle F… Major Ar… Arterial                 2001
##  2 17    Lake Sho… Humber Bay… Humber B… Major Ar… Arterial                 2001
##  3 18    Lake Sho… 37 M E Fle… Humber B… Major Ar… Arterial                 2001
##  4 19    Lake Sho… 50.7 M E L… 37 M E F… Major Ar… Arterial                 2001
##  5 38    Queens Q… Martin Goo… Bathurst… Collector Collector                2001
##  6 39    Davenpor… Cottingham… Macphers… Minor Ar… Arterial                 2001
##  7 40    Elizabet… College St  Gerrard … Collector Collector                2001
##  8 41    Gerrard … Yonge St    Church St Minor Ar… Arterial                 2001
##  9 42    Macphers… Davenport … Poplar P… Collector Collector                2001
## 10 43    Lake Sho… Marine Par… Palace P… Major Ar… Arterial                 2001
## # ℹ 1,313 more rows
## # ℹ 16 more variables: install_type <chr>, verify_install_year <dbl>,
## #   verify_install_date <chr>, verify_install_type <chr>,
## #   verify_install_comment <chr>, verify_upgrade1_year <dbl>,
## #   verify_upgrade1_date <chr>, verify_upgrade1_type <chr>,
## #   verify_upgrade1_comment <chr>, verify_upgrade2_year <dbl>,
## #   verify_upgrade2_date <chr>, verify_upgrade2_type <chr>, …

Verified Dates

  • Download Link:
  • Download Date:
  • Data Updated:
  • Notes:
# Read data
vdates_raw <- read_csv("../data/verify-dates-2024-06-07.csv")

# Get download date
vdates_dldate <- ddesc %>% filter(
    file == "verify-dates-2024-06-07.csv"
) %>% pull(download_date)

Data

  • Columns: 8
  • Rows: 298
vdates_raw

Dictionary

The data contains the following columns:

vdates_ddict <- read_csv("../data/verify-dates-2024-06-07-datadict.csv")
vdates_ddict

Files

The data files are available below:

Toronto KSI

KSI (2006-2022) data from the Toronto Police Service (TPS) Public Safety Data Portal for Toronto, Ontario

# Read data
ksi_raw <- read_sf("../data/toronto-ksi-2024-06-01.geojson")

# Get download date
ksi_dldate <- ddesc %>% filter(
    file == "toronto-ksi-2024-06-01.geojson"
) %>% pull(download_date)

Map

Note: Due to the large number of records, only the latest year of 2023 is displayed (n = 695).

tmap_mode("view")
tm_shape(ksi_raw %>% filter(year(DATE) == max(year(DATE)))) +
    tm_dots(
        col = "ACCLASS",
        clustering = TRUE,
        popup.vars = TRUE
    )

Data

  • Columns: 53
  • Rows: 18957
ksi_raw %>% as_tibble()

Dictionary

The data contains the following columns:

ksi_ddict <- read_csv("../data/toronto-ksi-2024-06-01-datadict.csv")
ksi_ddict

Details

print(ksi_raw)
## Simple feature collection with 18957 features and 52 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.63839 ymin: 43.58968 xmax: -79.12297 ymax: 43.85545
## Geodetic CRS:  WGS 84
## # A tibble: 18,957 × 53
##    OBJECTID INDEX_  ACCNUM DATE                TIME  STREET1      STREET2 OFFSET
##       <int> <chr>   <chr>  <dttm>              <chr> <chr>        <chr>   <chr> 
##  1        1 3389067 893184 2006-01-01 05:00:00 236   WOODBINE AVE O CONN… <NA>  
##  2        2 3389068 893184 2006-01-01 05:00:00 236   WOODBINE AVE O CONN… <NA>  
##  3        3 3389069 893184 2006-01-01 05:00:00 236   WOODBINE AVE O CONN… <NA>  
##  4        4 3389070 893184 2006-01-01 05:00:00 236   WOODBINE AVE O CONN… <NA>  
##  5        5 3389071 893184 2006-01-01 05:00:00 236   WOODBINE AVE O CONN… <NA>  
##  6        6 3389072 893184 2006-01-01 05:00:00 236   WOODBINE AVE O CONN… <NA>  
##  7        7 3389073 893184 2006-01-01 05:00:00 236   WOODBINE AVE O CONN… <NA>  
##  8        8 3389074 893184 2006-01-01 05:00:00 236   WOODBINE AVE O CONN… <NA>  
##  9        9 3433023 909646 2006-01-01 05:00:00 315   DANFORTH AVE WEST L… <NA>  
## 10       10 3433024 909646 2006-01-01 05:00:00 315   DANFORTH AVE WEST L… <NA>  
## # ℹ 18,947 more rows
## # ℹ 45 more variables: ROAD_CLASS <chr>, DISTRICT <chr>, LATITUDE <dbl>,
## #   LONGITUDE <dbl>, ACCLOC <chr>, TRAFFCTL <chr>, VISIBILITY <chr>,
## #   LIGHT <chr>, RDSFCOND <chr>, ACCLASS <chr>, IMPACTYPE <chr>, INVTYPE <chr>,
## #   INVAGE <chr>, INJURY <chr>, FATAL_NO <int>, INITDIR <chr>, VEHTYPE <chr>,
## #   MANOEUVER <chr>, DRIVACT <chr>, DRIVCOND <chr>, PEDTYPE <chr>,
## #   PEDACT <chr>, PEDCOND <chr>, CYCLISTYPE <chr>, CYCACT <chr>, …

Files

The data files are available below:

Cleaning

Combine Bikeways

Combine bikeway data across all cities.

# List of city bikeway data
bike_list <- list(
    vancouver = vancbike_raw,
    calgary = calgbike_raw %>%
        mutate(no_verify_install_type = NA),
    toronto = toronbike_raw %>%
        mutate(no_verify_install_type = NA)
)

# Get common columns across all city bikeways
bike_cols <- bike_list %>%
    map(colnames) %>%
    reduce(intersect) %>%
    c("no_verify_install_type") # include vanc lsbs

# Combine bikeway data across cities
bike <- names(bike_list) %>%
    map(function(city) {
        bike_list[[city]] %>%
            select(
                all_of(bike_cols)
            ) %>%
            mutate(
                city = factor(city, levels = names(bike_list)),
                .before = 1
            )
    }) %>%
    reduce(add_row)

# Display total records per city
clean_nrow <- bike %>%
    as_tibble %>%
    group_by(city) %>%
    summarize( # total segments per city
        segments = n()
    ) %>%
    ungroup %>%
    mutate(
        city = str_to_title(city)
    ) %>%
    rename(
        `Total Segments` = segments
    ) %>%
    rename_with(
        ~str_to_title(.)
    )
clean_nrow

Bikeway types include:

# Get all bikeway types
bike_types <- unique(bike$install_type)

# Display non-verified bikeways
cat(paste0("\n* ", bike_types, collapse = ""))
  • Local Street
  • Protected Bike Lanes
  • Painted Lanes
  • Shared Lanes
  • On-Street Bikeway
  • DECOMMISSIONED
  • Shared Lane
  • Bicycle Lane
  • TEMPORARY
  • Neighbourhood Greenway
  • Cycle Track
  • On-Street BIkeway
  • Bike Lane
  • Bi-Directional Cycle Track
  • Bike Lane - Buffered
  • Bike Lane - Contraflow
  • Cycle Track - Contraflow
  • Sharrows - Wayfinding
  • Multi-Use Trail
  • Multi-Use Trail - Entrance
  • Park Road
  • Sharrows
  • Signed Route (No Pavement Markings)
  • Multi-Use Trail - Existing Connector
  • Multi-Use Trail - Boulevard
  • Multi-Use Trail - Connector
  • Sharrows - Arterial - Connector

Filter Verified Bikeways

Filter for bikeways with verified installations and upgrades.

# Filter verified bikeways
vbike <- bike %>%
    filter(
        (!is.na(verify_install_year) |
        !is.na(verify_upgrade1_year) |
        !is.na(verify_upgrade2_year)) &
        is.na(no_verify_install_type)
    )

# Count verified bikeways
clean_nrow <- vbike %>%
    as_tibble %>%
    group_by(city) %>%
    summarize( # total verified segments per city
        segments = n()
    ) %>%
    ungroup %>%
    add_row( # total verified segment counts
        city = "all",
        segments = sum(.$segments)
    ) %>%
    mutate(
        city = str_to_title(city)
    ) %>%
    rename(`Total Verified` = segments) %>%
    rename_with(
        ~str_to_title(.)
    ) %>%
    left_join(clean_nrow, by = "City") %>%
    mutate(
        `Total Verified %` = `Total Verified` / `Total Segments` * 100
    ) %>%
    relocate(
        "Total Verified",
        "Total Verified %",
        .after = "City"
    ) %>%
    rename_with(
        ~gsub("%", "% of Total Segments", .),
        starts_with("Total Verified")
    )
clean_nrow

Verified bikeway types include:

# Get all bikeway types
vbike_types <- unique(vbike$install_type)

# Display verified bikeways
cat(paste0("\n* ", vbike_types, collapse = ""))
  • Painted Lanes
  • Protected Bike Lanes
  • Bicycle Lane
  • On-Street Bikeway
  • Cycle Track
  • Bike Lane
  • Bi-Directional Cycle Track
  • Bike Lane - Buffered
  • Bike Lane - Contraflow
  • Cycle Track - Contraflow

Filter Post-2018 Bikeways

Filter for bikeways that were installed or upgraded after 2018.

# Filter ver install/upgrades 2019 or later only
vbike <- vbike %>%
    filter(
        verify_install_year > 2018 |
        verify_upgrade1_year > 2018 |
        verify_upgrade2_year > 2018
    )

# Count verified bikeways
clean_nrow <- vbike %>%
    as_tibble %>%
    group_by(city) %>%
    summarize( # total verified segments per city
        segments = n()
    ) %>%
    ungroup %>%
    add_row( # total verified segment counts
        city = "all",
        segments = sum(.$segments)
    ) %>%
    mutate(
        city = str_to_title(city)
    ) %>%
    rename(`Verified Post-2018` = segments) %>%
    rename_with(
        ~str_to_title(.)
    ) %>%
    left_join(clean_nrow, by = "City") %>%
    mutate(
        `Verified Post-2018 %` = `Verified Post-2018` / `Total Verified` * 100
    ) %>%
    relocate(
        "Verified Post-2018",
        "Verified Post-2018 %",
        .after = "City"
    ) %>%
    rename_with(
        ~gsub("%", "% of Total Verified", .),
        starts_with("Verified Post-2018")
    )
clean_nrow

Filter Verified with Dates

Filter for bikeways with verified install/upgrade dates only.

# Filter ver install/upgrades only
vbike <- vbike %>%
    filter(
        !is.na(verify_install_date) |
        !is.na(verify_upgrade2_date) |
        !is.na(verify_upgrade2_date)
    )

# Count verified bikeways with dates
clean_nrow <- vbike %>%
    as_tibble %>%
    group_by(city) %>%
    summarize( # total verified per city
        segments = n()
    ) %>%
    ungroup %>%
    add_row( # total segment counts
        city = "all",
        segments = sum(.$segments)
    ) %>%
    mutate(
        city = str_to_title(city)
    ) %>%
    rename(`Verified Dated` = segments) %>%
    rename_with(
        ~str_to_title(.)
    ) %>%
    left_join(clean_nrow, by = "City") %>%
    mutate(
        `Verified Dated %` = `Verified Dated` / `Verified Post-2018` * 100
    ) %>%
    relocate(
        "Verified Dated",
        "Verified Dated %",
        .after = "City"
    ) %>%
    rename_with(
        ~gsub("%", "% of Verified Post-2018", .),
        starts_with("Verified Dated")
    )
clean_nrow

Pivot Long and Remove Undated

Pivot to long format, where each record represents an installed (install), first upgrade (upgrade1), or second upgrade (upgrade2) verified bikeway segment.

Also remove any segments without dates.

# Pivot to longer format
lbike <- vbike %>%
    pivot_longer(
        cols = ends_with("_date"),
        names_to = "verify_event_type",
        values_to = "verify_date_raw"
    ) %>%
    mutate( # convert events to install, upgrade1, upgrade2
        verify_event_type = str_replace_all(
            verify_event_type,
            c("verify_" = "", "_date" = "")
        )
    ) %>%
    filter(!is.na(verify_date_raw))

# Count dated installs and upgrades
clean_nrow <- lbike %>%
    as_tibble %>%
    group_by(city, verify_event_type) %>%
    summarize(segments = n()) %>%
    ungroup %>%
    pivot_wider(
        names_from = verify_event_type,
        values_from = segments
    ) %>%
    mutate(
        city = str_to_title(city)
    ) %>%
    rename(
        `Dated Installs` = install,
        `Dated Upgrades (1st)` = upgrade1,
        `Dated Upgrades (2nd)` = upgrade2
    ) %>%
    rename_with(
        ~str_to_title(.)
    ) %>%
    left_join(
        clean_nrow,
        by = "City"
    ) %>%
    mutate(
        `Dated Installs %` = `Dated Installs` / `Verified Post-2018` * 100,
        `Dated Upgrades % (1st)` = `Dated Upgrades (1st)` / `Verified Post-2018` * 100,
        `Dated Upgrades % (2nd)` = `Dated Upgrades (2nd)` / `Verified Post-2018` * 100
    ) %>%
    relocate(
        "Dated Installs",
        "Dated Installs %",
        "Dated Upgrades (1st)",
        "Dated Upgrades % (1st)",
        "Dated Upgrades (2nd)",
        "Dated Upgrades % (2nd)",
        .after = "City"
    ) %>%
    rename_with(
        ~gsub("%", "% of Verified Post-2018", .),
        starts_with(c("Dated Installs", "Dated Upgrades"))
    )
clean_nrow

Clean Verified Dates

Clean manually entered ambiguous verified install/upgrade dates that had various time units (e.g. days, months, quarters, semesters, etc).

# Clean verified dates using join
lbike <- lbike %>%
    left_join( # join cleaned dates to raw dates
        vdates_raw,
        by = "verify_date_raw"
    ) %>%
    filter( # remove with no clean dates
        !is.na(verify_date_type)
    )

# Count clean dated installs and upgrades
clean_nrow <- lbike %>%
    as_tibble %>%
    group_by(city, verify_event_type) %>%
    summarize(segments = n()) %>%
    ungroup %>%
    pivot_wider(
        names_from = verify_event_type,
        values_from = segments
    ) %>%
    mutate(
        city = str_to_title(city)
    ) %>%
    rename(
        `Clean Dated Installs` = install,
        `Clean Dated Upgrades (1st)` = upgrade1,
        `Clean Dated Upgrades (2nd)` = upgrade2
    ) %>%
    rename_with(
        ~str_to_title(.)
    ) %>%
    left_join(
        clean_nrow,
        by = "City"
    ) %>%
    mutate(
        `Clean Dated Installs %` = `Clean Dated Installs` / `Verified Post-2018` * 100,
        `Clean Dated Upgrades % (1st)` = `Clean Dated Upgrades (1st)` / `Verified Post-2018` * 100,
        `Clean Dated Upgrades % (2nd)` = `Clean Dated Upgrades (2nd)` / `Verified Post-2018` * 100
    ) %>%
    relocate(
        "Clean Dated Installs",
        "Clean Dated Installs %",
        "Clean Dated Upgrades (1st)",
        "Clean Dated Upgrades % (1st)",
        "Clean Dated Upgrades (2nd)",
        "Clean Dated Upgrades % (2nd)",
        .after = "City"
    ) %>%
    rename_with(
        ~gsub("%", "% of Verified Post-2018", .),
        starts_with("Clean Dated")
    )
clean_nrow

Determine Unit of Time

Determine the unit of time based on the amount of data available per time unit.

Use a unit of time other than year.

# Classify as month, quarter, biyear, triyear
lbike <- lbike %>%
    mutate(
        verify_date_month = case_when(
            verify_date_type %in% c("day", "month") ~ month(verify_date, label = T, abbr = F)
        ),
        verify_date_quarter = case_when( # classify Q1, Q2, Q3, Q4
            month(verify_date) %in% 1:3 |
            (
                month(verify_date_start) %in% 1:3 &
                month(verify_date_end) %in% 1:3 &
                year(verify_date_start) == year(verify_date_end)
            ) ~ "Q1",
            month(verify_date) %in% 4:6 |
            (
                month(verify_date_start) %in% 4:6 &
                month(verify_date_end) %in% 4:6 &
                year(verify_date_start) == year(verify_date_end)
            ) ~ "Q2",
            month(verify_date) %in% 1:3 |
            (
                month(verify_date_start) %in% 7:9 &
                month(verify_date_end) %in% 7:9 &
                year(verify_date_start) == year(verify_date_end)
            ) ~ "Q3",
            month(verify_date) %in% 10:12 |
            (
                month(verify_date_start) %in% 10:12 &
                month(verify_date_end) %in% 10:12 &
                year(verify_date_start) == year(verify_date_end)
            ) ~ "Q4"
        ),
        verify_date_triyear = case_when( # classify Fall, Winter, Spring/Summer
            month(verify_date) %in% 1:4 |
            (
                month(verify_date_start) %in% 1:4 &
                month(verify_date_end) %in% 1:4 &
                year(verify_date_start) == year(verify_date_end)
            ) ~ "Winter",
            month(verify_date) %in% 5:8 |
            (
                month(verify_date_start) %in% 5:8 &
                month(verify_date_end) %in% 5:8 &
                year(verify_date_start) == year(verify_date_end)
            ) ~ "Spring/Summer",
            month(verify_date) %in% 9:12 |
            (
                month(verify_date_start) %in% 9:12 &
                month(verify_date_end) %in% 9:12 &
                year(verify_date_start) == year(verify_date_end)
            ) ~ "Fall"
        ),
        verify_date_biyear = case_when( # classify 1st Half, 2nd Half
            month(verify_date) %in% 1:6 |
            (
                month(verify_date_start) %in% 1:6 &
                month(verify_date_end) %in% 1:6 &
                year(verify_date_start) == year(verify_date_end)
            ) ~ "1st Half",
            month(verify_date) %in% 7:12 |
            (
                month(verify_date_start) %in% 7:12 &
                month(verify_date_end) %in% 7:12 &
                year(verify_date_start) == year(verify_date_end)
            ) ~ "2nd Half"
        )
    )

# Count records per time units
clean_nrow <- lbike %>%
    as_tibble %>%
    group_by(city, verify_event_type) %>%
    summarize(
        `month` = sum(!is.na(verify_date_month), na.rm = T),
        `quarter` = sum(!is.na(verify_date_quarter), na.rm = T),
        `triyear` = sum(!is.na(verify_date_triyear), na.rm = T),
        `biyear` = sum(!is.na(verify_date_biyear), na.rm = T),
    ) %>%
    ungroup %>%
    pivot_wider(
        names_from = verify_event_type,
        values_from = -c(city, verify_event_type),
        names_sep = "_"
    ) %>%
    mutate(
        city = str_to_title(city)
    ) %>%
    rename(
        `Monthly Installs` = month_install,
        `Monthly Upgrades (1st)` = month_upgrade1,
        `Monthly Upgrades (2nd)` = month_upgrade2,
        `Quarterly Installs` = quarter_install,
        `Quarterly Upgrades (1st)` = quarter_upgrade1,
        `Quarterly Upgrades (2nd)` = quarter_upgrade2,
        `Triyearly Installs` = triyear_install,
        `Triyearly Upgrades (1st)` = triyear_upgrade1,
        `Triyearly Upgrades (2nd)` = triyear_upgrade2,
        `Biyearly Installs` = biyear_install,
        `Biyearly Upgrades (1st)` = biyear_upgrade1,
        `Biyearly Upgrades (2nd)` = biyear_upgrade2
    ) %>%
    rename_with(
        ~str_to_title(.)
    ) %>%
    left_join(
        clean_nrow,
        by = "City"
    ) %>%
    mutate(
        `Monthly Installs %` = `Monthly Installs` / `Verified Post-2018` * 100,
        `Monthly Upgrades % (1st)` = `Monthly Upgrades (1st)` / `Verified Post-2018` * 100,
        `Monthly Upgrades % (2nd)` = `Monthly Upgrades (2nd)` / `Verified Post-2018` * 100,
        `Quarterly Installs %` = `Quarterly Installs` / `Verified Post-2018` * 100,
        `Quarterly Upgrades % (1st)` = `Quarterly Upgrades (1st)` / `Verified Post-2018` * 100,
        `Quarterly Upgrades % (2nd)` = `Quarterly Upgrades (2nd)` / `Verified Post-2018` * 100,
        `Triyearly Installs %` = `Triyearly Installs` / `Verified Post-2018` * 100,
        `Triyearly Upgrades % (1st)` = `Triyearly Upgrades (1st)` / `Verified Post-2018` * 100,
        `Triyearly Upgrades % (2nd)` = `Triyearly Upgrades (2nd)` / `Verified Post-2018` * 100,
        `Biyearly Installs %` = `Biyearly Installs` / `Verified Post-2018` * 100,
        `Biyearly Upgrades % (1st)` = `Biyearly Upgrades (1st)` / `Verified Post-2018` * 100,
        `Biyearly Upgrades % (2nd)` = `Biyearly Upgrades (2nd)` / `Verified Post-2018` * 100,
    ) %>%
    relocate(
        "Monthly Installs",
        "Monthly Installs %",
        "Monthly Upgrades (1st)",
        "Monthly Upgrades % (1st)",
        "Monthly Upgrades (2nd)",
        "Monthly Upgrades % (2nd)",
        "Quarterly Installs",
        "Quarterly Installs %",
        "Quarterly Upgrades (1st)",
        "Quarterly Upgrades % (1st)",
        "Quarterly Upgrades (2nd)",
        "Quarterly Upgrades % (2nd)",
        "Triyearly Installs",
        "Triyearly Installs %",
        "Triyearly Upgrades (1st)",
        "Triyearly Upgrades % (1st)",
        "Triyearly Upgrades (2nd)",
        "Triyearly Upgrades % (2nd)",
        "Biyearly Installs",
        "Biyearly Installs %",
        "Biyearly Upgrades (1st)",
        "Biyearly Upgrades % (1st)",
        "Biyearly Upgrades (2nd)",
        "Biyearly Upgrades % (2nd)",
        .after = "City"
    ) %>%
    rename_with(
        ~gsub("%", "% of Verified Post-2018", .),
        starts_with(c("Monthly", "Quarterly", "Triyearly", "Biyearly"))
    )
clean_nrow
unit_plot_data <- clean_nrow %>%
    select(
        "City",
        "Monthly Installs % of Verified Post-2018",
        "Monthly Upgrades % of Verified Post-2018 (1st)",
        "Monthly Upgrades % of Verified Post-2018 (2nd)",
        "Quarterly Installs % of Verified Post-2018",
        "Quarterly Upgrades % of Verified Post-2018 (1st)",
        "Quarterly Upgrades % of Verified Post-2018 (2nd)",
        "Triyearly Installs % of Verified Post-2018",
        "Triyearly Upgrades % of Verified Post-2018 (1st)",
        "Triyearly Upgrades (2nd)",
        "Biyearly Installs % of Verified Post-2018",
        "Biyearly Upgrades % of Verified Post-2018 (1st)",
        "Biyearly Upgrades % of Verified Post-2018 (2nd)"
    ) %>%
    pivot_longer(
        cols = -City,
        names_to = "Column",
        values_to = "% of Verified Post-2018 Segments"
    ) %>%
    mutate(
        Type = case_when(
            str_detect(Column, "Installs") ~ "Installs",
            str_detect(Column, "\\(1st\\)") ~ "Upgrades (1st)",
            str_detect(Column, "\\(2nd\\)") ~ "Upgrades (2nd)"
        ),
        `Unit of Time` = case_when(
            str_starts(Column, "Monthly") ~ "Monthly",
            str_starts(Column, "Quarterly") ~ "Quarterly",
            str_starts(Column, "Triyearly") ~ "Triyearly",
            str_starts(Column, "Biyearly") ~ "Biyearly"
        ),
        `Unit of Time` = factor(
            `Unit of Time`,
            levels = rev(c(
                "Monthly",
                "Quarterly",
                "Triyearly",
                "Biyearly"
            ))
        )
    )

ggplot(
    unit_plot_data,
    aes(
        x = City,
        y = `% of Verified Post-2018 Segments`,
        group = `Unit of Time`,
        fill = `Unit of Time`
    )
) +
    geom_bar(
        stat = "identity",
        position = "dodge"
    ) +
    facet_wrap(
        ~Type,
        ncol = 1
    ) +
    scale_fill_discrete(
        guide = guide_legend(reverse = TRUE)
    ) +
    theme_minimal() +
    theme(
        legend.position = "top"
    ) +
    coord_flip()